initial changes for soniox (#270)

* initial changes for soniox

* changes to gather for soniox

* parse soniox stt results

* handle <end> token for soniox

* soniox: handle empty array of words

* support for soniox hints

* add soniox storage options

* update to verb specs

* add support for transcribe

* compile soniox transcripts

* gather: kill no input timer for soniox when we get interim results

* fix buffering of soniox transcripts

* fix for compiling soniox transcript

* another fix for compiling soniox transcript

* another fix

* handling of <end> token

* fix soniox bug

* gather: fixes for soniox continous asr

* fix undefined variable reference

* fix prev commit

* bugfix: allow verb_status requests

* gather: for soniox no need to restart transcription after final transcription received

* update verb specs

* update verb specs, fixes for continuous asr:
This commit is contained in:
Dave Horton
2023-03-03 13:37:55 -05:00
committed by GitHub
parent ab1947e23e
commit 1c683f1142
10 changed files with 272 additions and 33 deletions

View File

@@ -7,6 +7,7 @@ const {
AwsTranscriptionEvents,
AzureTranscriptionEvents,
DeepgramTranscriptionEvents,
SonioxTranscriptionEvents,
IbmTranscriptionEvents,
NvidiaTranscriptionEvents
} = require('../utils/constants');
@@ -33,11 +34,13 @@ class TaskGather extends Task {
setChannelVarsForStt,
normalizeTranscription,
removeSpeechListeners,
setSpeechCredentialsAtRuntime
setSpeechCredentialsAtRuntime,
compileSonioxTranscripts
} = require('../utils/transcription-utils')(logger);
this.setChannelVarsForStt = setChannelVarsForStt;
this.normalizeTranscription = normalizeTranscription;
this.removeSpeechListeners = removeSpeechListeners;
this.compileSonioxTranscripts = compileSonioxTranscripts;
[
'finishOnKey', 'input', 'numDigits', 'minDigits', 'maxDigits',
@@ -85,6 +88,9 @@ class TaskGather extends Task {
/* buffer speech for continuous asr */
this._bufferedTranscripts = [];
/* buffer for soniox transcripts */
this._sonioxTranscripts = [];
this.parentTask = parentTask;
}
@@ -288,6 +294,7 @@ class TaskGather extends Task {
this._killAudio(cs);
this.ep.removeAllListeners('dtmf');
clearTimeout(this.interDigitTimer);
this._clearAsrTimer();
this.playTask?.span.end();
this.sayTask?.span.end();
this._resolve('killed');
@@ -389,6 +396,13 @@ class TaskGather extends Task {
this._onDeepGramConnectFailure.bind(this, cs, ep));
break;
case 'soniox':
this.bugname = 'soniox_transcribe';
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(SonioxTranscriptionEvents.Error,
this._onSonioxError.bind(this, cs, ep));
break;
case 'ibm':
this.bugname = 'ibm_transcribe';
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
@@ -523,7 +537,7 @@ class TaskGather extends Task {
// make sure this is not a transcript from answering machine detection
const bugname = fsEvent.getHeader('media-bugname');
const finished = fsEvent.getHeader('transcription-session-finished');
this.logger.debug({evt, bugname, finished}, 'Gather:_onTranscription');
this.logger.debug({evt, bugname, finished}, `Gather:_onTranscription for vendor ${this.vendor}`);
if (bugname && this.bugname !== bugname) return;
if (this.vendor === 'ibm') {
@@ -544,9 +558,8 @@ class TaskGather extends Task {
/* count words for bargein feature */
const words = evt.alternatives[0]?.transcript.split(' ').length;
const bufferedWords = this._bufferedTranscripts.reduce((count, e) => {
return count + e.alternatives[0]?.transcript.split(' ').length;
}, 0);
const bufferedWords = this._sonioxTranscripts.length +
this._bufferedTranscripts.reduce((count, e) => count + e.alternatives[0]?.transcript.split(' ').length, 0);
if (evt.is_final) {
if (evt.alternatives[0].transcript === '' && !this.callSession.callGone && !this.killed) {
@@ -555,7 +568,6 @@ class TaskGather extends Task {
}
else {
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
//this._startTranscribing(ep);
}
return;
}
@@ -579,7 +591,9 @@ class TaskGather extends Task {
return this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout');
}
this._startAsrTimer();
return this._startTranscribing(ep);
/* some STT engines will keep listening after a final response, so no need to restart */
if (!['soniox', 'aws', 'microsoft', 'deepgram'].includes(this.vendor)) this._startTranscribing(ep);
}
else {
if (this.bargein && (words + bufferedWords) < this.minBargeinWordCount) {
@@ -590,6 +604,12 @@ class TaskGather extends Task {
return;
}
else {
if (this.vendor === 'soniox') {
/* compile transcripts into one */
this._sonioxTranscripts.push(evt.vendor.finalWords);
evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
this._sonioxTranscripts = [];
}
this._resolve('speech', evt);
}
}
@@ -613,6 +633,13 @@ class TaskGather extends Task {
this.cs.requestor.request('verb:hook', this.partialResultHook, Object.assign({speech: evt},
this.cs.callInfo, httpHeaders));
}
if (this.vendor === 'soniox') {
this._clearTimer();
if (evt.vendor.finalWords.length) {
this.logger.debug({evt}, 'TaskGather:_onTranscription - buffering soniox transcript');
this._sonioxTranscripts.push(evt.vendor.finalWords);
}
}
}
}
_onEndOfUtterance(cs, ep) {
@@ -643,6 +670,9 @@ class TaskGather extends Task {
return this._resolve('timeout');
}
}
_onSonioxError(cs, ep, evt) {
this.logger.info({evt}, 'TaskGather:_onSonioxError');
}
_onNvidiaError(cs, ep, evt) {
this.logger.info({evt}, 'TaskGather:_onNvidiaError');
}

View File

@@ -3,10 +3,11 @@ const {
TaskName,
TaskPreconditions,
GoogleTranscriptionEvents,
AzureTranscriptionEvents,
AwsTranscriptionEvents,
NuanceTranscriptionEvents,
AwsTranscriptionEvents,
AzureTranscriptionEvents,
DeepgramTranscriptionEvents,
SonioxTranscriptionEvents,
IbmTranscriptionEvents,
NvidiaTranscriptionEvents
} = require('../utils/constants');
@@ -195,7 +196,12 @@ class TaskTranscribe extends Task {
ep.addCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure,
this._onDeepGramConnectFailure.bind(this, cs, ep, channel));
break;
case 'soniox':
this.bugname = 'soniox_transcribe';
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(SonioxTranscriptionEvents.Error,
this._onSonioxError.bind(this, cs, ep));
break;
case 'ibm':
this.bugname = 'ibm_transcribe';
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription,
@@ -326,8 +332,11 @@ class TaskTranscribe extends Task {
return this._resolve('timeout');
}
}
_onSonioxError(cs, ep, evt) {
this.logger.info({evt}, 'TaskTranscribe:_onSonioxError');
}
_onNvidiaError(cs, ep, evt) {
this.logger.info({evt}, 'TaskGather:_onNvidiaError');
this.logger.info({evt}, 'TaskTranscribe:_onNvidiaError');
}
_onDeepgramConnect(_cs, _ep) {
this.logger.debug('TaskTranscribe:_onDeepgramConnect');
@@ -365,7 +374,7 @@ class TaskTranscribe extends Task {
this.notifyTaskDone();
}
_onIbmError(cs, _ep, _channel, evt) {
this.logger.info({evt}, 'TaskGather:_onIbmError');
this.logger.info({evt}, 'TaskTranscribe:_onIbmError');
}