Speech vendor/cobalt (#463)

* initial changes for cobalt speech

* wip

* wip

* update to drachtio-fsmrf that supports cobalt

* update to verb-specifications with cobalt speech support

* more wip

* lint

* use node 18 with gh actions

* support for compiling cobalt hints

* fix bug in uuid_cobalt_compile_context

* update verb-specifications

* remove repeated code

* cobalt support for transcribe

* update to verb specs
This commit is contained in:
Dave Horton
2023-09-13 09:47:30 -04:00
committed by GitHub
parent d220733dea
commit a1793ac359
11 changed files with 204 additions and 24 deletions

View File

@@ -6,6 +6,7 @@ const {
AzureTranscriptionEvents,
DeepgramTranscriptionEvents,
SonioxTranscriptionEvents,
CobaltTranscriptionEvents,
IbmTranscriptionEvents,
NvidiaTranscriptionEvents,
JambonzTranscriptionEvents
@@ -187,12 +188,18 @@ class TaskGather extends SttTask {
}
}
/* when using cobalt model is required */
if (this.vendor === 'cobalt' && !this.data.recognizer.model) {
this.notifyError({ msg: 'ASR error', details:'Cobalt requires a model to be specified'});
throw new Error('Cobalt requires a model to be specified');
}
const startListening = async(cs, ep) => {
this._startTimer();
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
if (this.input.includes('speech') && !this.listenDuringPrompt) {
try {
await this._initSpeech(cs, ep);
await this._setSpeechHandlers(cs, ep);
if (this.killed) {
this.logger.info('Gather:exec - task was quickly killed so do not transcribe');
return;
@@ -260,7 +267,7 @@ class TaskGather extends SttTask {
}
if (this.input.includes('speech') && this.listenDuringPrompt) {
await this._initSpeech(cs, ep);
await this._setSpeechHandlers(cs, ep);
this._startTranscribing(ep);
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
.catch(() => {/*already logged error */});
@@ -334,7 +341,9 @@ class TaskGather extends SttTask {
}
}
async _initSpeech(cs, ep) {
async _setSpeechHandlers(cs, ep) {
if (this._speechHandlersSet) return;
this._speechHandlersSet = true;
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
switch (this.vendor) {
case 'google':
@@ -387,6 +396,28 @@ class TaskGather extends SttTask {
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
break;
case 'cobalt':
this.bugname = 'cobalt_speech';
ep.addCustomEventListener(CobaltTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
/* special case: if using hints with cobalt we need to compile them */
if (this.vendor === 'cobalt' && opts.COBALT_SPEECH_HINTS) {
try {
const context = await this.compileHintsForCobalt(
ep,
opts.COBALT_SERVER_URI,
this.data.recognizer.model,
opts.COBALT_CONTEXT_TOKEN,
opts.COBALT_SPEECH_HINTS
);
if (context) opts.COBALT_COMPILED_CONTEXT_DATA = context;
delete opts.COBALT_SPEECH_HINTS;
} catch (err) {
this.logger.error({err}, 'Error compiling hints for cobalt');
}
}
break;
case 'ibm':
this.bugname = 'ibm_transcribe';
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));