mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 16:50:39 +00:00
initial changes for soniox (#270)
* initial changes for soniox * changes to gather for soniox * parse soniox stt results * handle <end> token for soniox * soniox: handle empty array of words * support for soniox hints * add soniox storage options * update to verb specs * add support for transcribe * compile soniox transcripts * gather: kill no input timer for soniox when we get interim results * fix buffering of soniox transcripts * fix for compiling soniox transcript * another fix for compiling soniox transcript * another fix * handling of <end> token * fix soniox bug * gather: fixes for soniox continous asr * fix undefined variable reference * fix prev commit * bugfix: allow verb_status requests * gather: for soniox no need to restart transcription after final transcription received * update verb specs * update verb specs, fixes for continuous asr:
This commit is contained in:
@@ -7,6 +7,7 @@ const {
|
||||
AwsTranscriptionEvents,
|
||||
AzureTranscriptionEvents,
|
||||
DeepgramTranscriptionEvents,
|
||||
SonioxTranscriptionEvents,
|
||||
IbmTranscriptionEvents,
|
||||
NvidiaTranscriptionEvents
|
||||
} = require('../utils/constants');
|
||||
@@ -33,11 +34,13 @@ class TaskGather extends Task {
|
||||
setChannelVarsForStt,
|
||||
normalizeTranscription,
|
||||
removeSpeechListeners,
|
||||
setSpeechCredentialsAtRuntime
|
||||
setSpeechCredentialsAtRuntime,
|
||||
compileSonioxTranscripts
|
||||
} = require('../utils/transcription-utils')(logger);
|
||||
this.setChannelVarsForStt = setChannelVarsForStt;
|
||||
this.normalizeTranscription = normalizeTranscription;
|
||||
this.removeSpeechListeners = removeSpeechListeners;
|
||||
this.compileSonioxTranscripts = compileSonioxTranscripts;
|
||||
|
||||
[
|
||||
'finishOnKey', 'input', 'numDigits', 'minDigits', 'maxDigits',
|
||||
@@ -85,6 +88,9 @@ class TaskGather extends Task {
|
||||
/* buffer speech for continuous asr */
|
||||
this._bufferedTranscripts = [];
|
||||
|
||||
/* buffer for soniox transcripts */
|
||||
this._sonioxTranscripts = [];
|
||||
|
||||
this.parentTask = parentTask;
|
||||
}
|
||||
|
||||
@@ -288,6 +294,7 @@ class TaskGather extends Task {
|
||||
this._killAudio(cs);
|
||||
this.ep.removeAllListeners('dtmf');
|
||||
clearTimeout(this.interDigitTimer);
|
||||
this._clearAsrTimer();
|
||||
this.playTask?.span.end();
|
||||
this.sayTask?.span.end();
|
||||
this._resolve('killed');
|
||||
@@ -389,6 +396,13 @@ class TaskGather extends Task {
|
||||
this._onDeepGramConnectFailure.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
case 'soniox':
|
||||
this.bugname = 'soniox_transcribe';
|
||||
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(SonioxTranscriptionEvents.Error,
|
||||
this._onSonioxError.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
case 'ibm':
|
||||
this.bugname = 'ibm_transcribe';
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
@@ -523,7 +537,7 @@ class TaskGather extends Task {
|
||||
// make sure this is not a transcript from answering machine detection
|
||||
const bugname = fsEvent.getHeader('media-bugname');
|
||||
const finished = fsEvent.getHeader('transcription-session-finished');
|
||||
this.logger.debug({evt, bugname, finished}, 'Gather:_onTranscription');
|
||||
this.logger.debug({evt, bugname, finished}, `Gather:_onTranscription for vendor ${this.vendor}`);
|
||||
if (bugname && this.bugname !== bugname) return;
|
||||
|
||||
if (this.vendor === 'ibm') {
|
||||
@@ -544,9 +558,8 @@ class TaskGather extends Task {
|
||||
|
||||
/* count words for bargein feature */
|
||||
const words = evt.alternatives[0]?.transcript.split(' ').length;
|
||||
const bufferedWords = this._bufferedTranscripts.reduce((count, e) => {
|
||||
return count + e.alternatives[0]?.transcript.split(' ').length;
|
||||
}, 0);
|
||||
const bufferedWords = this._sonioxTranscripts.length +
|
||||
this._bufferedTranscripts.reduce((count, e) => count + e.alternatives[0]?.transcript.split(' ').length, 0);
|
||||
|
||||
if (evt.is_final) {
|
||||
if (evt.alternatives[0].transcript === '' && !this.callSession.callGone && !this.killed) {
|
||||
@@ -555,7 +568,6 @@ class TaskGather extends Task {
|
||||
}
|
||||
else {
|
||||
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
|
||||
//this._startTranscribing(ep);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -579,7 +591,9 @@ class TaskGather extends Task {
|
||||
return this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout');
|
||||
}
|
||||
this._startAsrTimer();
|
||||
return this._startTranscribing(ep);
|
||||
|
||||
/* some STT engines will keep listening after a final response, so no need to restart */
|
||||
if (!['soniox', 'aws', 'microsoft', 'deepgram'].includes(this.vendor)) this._startTranscribing(ep);
|
||||
}
|
||||
else {
|
||||
if (this.bargein && (words + bufferedWords) < this.minBargeinWordCount) {
|
||||
@@ -590,6 +604,12 @@ class TaskGather extends Task {
|
||||
return;
|
||||
}
|
||||
else {
|
||||
if (this.vendor === 'soniox') {
|
||||
/* compile transcripts into one */
|
||||
this._sonioxTranscripts.push(evt.vendor.finalWords);
|
||||
evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
|
||||
this._sonioxTranscripts = [];
|
||||
}
|
||||
this._resolve('speech', evt);
|
||||
}
|
||||
}
|
||||
@@ -613,6 +633,13 @@ class TaskGather extends Task {
|
||||
this.cs.requestor.request('verb:hook', this.partialResultHook, Object.assign({speech: evt},
|
||||
this.cs.callInfo, httpHeaders));
|
||||
}
|
||||
if (this.vendor === 'soniox') {
|
||||
this._clearTimer();
|
||||
if (evt.vendor.finalWords.length) {
|
||||
this.logger.debug({evt}, 'TaskGather:_onTranscription - buffering soniox transcript');
|
||||
this._sonioxTranscripts.push(evt.vendor.finalWords);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_onEndOfUtterance(cs, ep) {
|
||||
@@ -643,6 +670,9 @@ class TaskGather extends Task {
|
||||
return this._resolve('timeout');
|
||||
}
|
||||
}
|
||||
_onSonioxError(cs, ep, evt) {
|
||||
this.logger.info({evt}, 'TaskGather:_onSonioxError');
|
||||
}
|
||||
_onNvidiaError(cs, ep, evt) {
|
||||
this.logger.info({evt}, 'TaskGather:_onNvidiaError');
|
||||
}
|
||||
|
||||
@@ -3,10 +3,11 @@ const {
|
||||
TaskName,
|
||||
TaskPreconditions,
|
||||
GoogleTranscriptionEvents,
|
||||
AzureTranscriptionEvents,
|
||||
AwsTranscriptionEvents,
|
||||
NuanceTranscriptionEvents,
|
||||
AwsTranscriptionEvents,
|
||||
AzureTranscriptionEvents,
|
||||
DeepgramTranscriptionEvents,
|
||||
SonioxTranscriptionEvents,
|
||||
IbmTranscriptionEvents,
|
||||
NvidiaTranscriptionEvents
|
||||
} = require('../utils/constants');
|
||||
@@ -195,7 +196,12 @@ class TaskTranscribe extends Task {
|
||||
ep.addCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure,
|
||||
this._onDeepGramConnectFailure.bind(this, cs, ep, channel));
|
||||
break;
|
||||
|
||||
case 'soniox':
|
||||
this.bugname = 'soniox_transcribe';
|
||||
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(SonioxTranscriptionEvents.Error,
|
||||
this._onSonioxError.bind(this, cs, ep));
|
||||
break;
|
||||
case 'ibm':
|
||||
this.bugname = 'ibm_transcribe';
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription,
|
||||
@@ -326,8 +332,11 @@ class TaskTranscribe extends Task {
|
||||
return this._resolve('timeout');
|
||||
}
|
||||
}
|
||||
_onSonioxError(cs, ep, evt) {
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onSonioxError');
|
||||
}
|
||||
_onNvidiaError(cs, ep, evt) {
|
||||
this.logger.info({evt}, 'TaskGather:_onNvidiaError');
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onNvidiaError');
|
||||
}
|
||||
_onDeepgramConnect(_cs, _ep) {
|
||||
this.logger.debug('TaskTranscribe:_onDeepgramConnect');
|
||||
@@ -365,7 +374,7 @@ class TaskTranscribe extends Task {
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
_onIbmError(cs, _ep, _channel, evt) {
|
||||
this.logger.info({evt}, 'TaskGather:_onIbmError');
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onIbmError');
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user