Feature/ibm watson (#193)

* initial changes to support ibm watson

* update specs.json for ibm

* update to drachtio-fsmrf with support for ibm

* bugfix: set access token for ibm stt, not api_key

* fix name of api_key

* normalize ibm transcription results

* rework ibm credentials

* bugfix setting runtime speech creds

* bugfix: ibm region

* typo

* changes to transcribe for ibm watson

* implement connect handler

* bugfix: bind error

* proper use of result_index

* ibm error handling
This commit is contained in:
Dave Horton
2022-11-21 22:09:37 -05:00
committed by GitHub
parent 8686348454
commit 71a2435c63
9 changed files with 217 additions and 51 deletions

View File

@@ -6,7 +6,8 @@ const {
NuanceTranscriptionEvents,
AwsTranscriptionEvents,
AzureTranscriptionEvents,
DeepgramTranscriptionEvents
DeepgramTranscriptionEvents,
IbmTranscriptionEvents
} = require('../utils/constants');
const makeTask = require('./make_task');
@@ -30,7 +31,8 @@ class TaskGather extends Task {
const {
setChannelVarsForStt,
normalizeTranscription,
removeSpeechListeners
removeSpeechListeners,
setSpeechCredentialsAtRuntime
} = require('../utils/transcription-utils')(logger);
this.setChannelVarsForStt = setChannelVarsForStt;
this.normalizeTranscription = normalizeTranscription;
@@ -56,14 +58,7 @@ class TaskGather extends Task {
this.language = recognizer.language;
/* let credentials be supplied in the recognizer object at runtime */
if (recognizer.vendor === 'nuance') {
const {clientId, secret} = recognizer.nuanceOptions;
if (clientId && secret) this.sttCredentials = {client_id: clientId, secret};
}
else if (recognizer.vendor === 'deepgram') {
const {apiKey} = recognizer.deepgramOptions;
if (apiKey) this.sttCredentials = {api_key: apiKey};
}
this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);
/* continuous ASR (i.e. compile transcripts until a special timeout or dtmf key) */
this.asrTimeout = typeof recognizer.asrTimeout === 'number' ? recognizer.asrTimeout * 1000 : 0;
@@ -120,7 +115,7 @@ class TaskGather extends Task {
this.logger.debug('Gather:exec');
await super.exec(cs);
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
const {getNuanceAccessToken} = cs.srf.locals.dbHelpers;
const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
if (cs.hasGlobalSttHints) {
const {hints, hintsBoost} = cs.globalSttHints;
@@ -173,6 +168,13 @@ class TaskGather extends Task {
this.logger.debug({client_id}, `Gather:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
this.sttCredentials = {...this.sttCredentials, access_token};
}
else if (this.vendor == 'ibm' && this.sttCredentials.stt_api_key) {
/* get ibm access token */
const {stt_api_key, stt_region} = this.sttCredentials;
const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
this.sttCredentials = {...this.sttCredentials, access_token, stt_region};
}
const startListening = (cs, ep) => {
this._startTimer();
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
@@ -352,6 +354,16 @@ class TaskGather extends Task {
this._onDeepGramConnectFailure.bind(this, cs, ep));
break;
case 'ibm':
this.bugname = 'ibm_transcribe';
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(IbmTranscriptionEvents.Connect, this._onIbmConnect.bind(this, cs, ep));
ep.addCustomEventListener(IbmTranscriptionEvents.ConnectFailure,
this._onIbmConnectFailure.bind(this, cs, ep));
ep.addCustomEventListener(IbmTranscriptionEvents.Error,
this._onIbmError.bind(this, cs, ep));
break;
default:
throw new Error(`Invalid vendor ${this.vendor}`);
}
@@ -579,6 +591,27 @@ class TaskGather extends Task {
this.notifyTaskDone();
}
_onIbmConnect(_cs, _ep) {
this.logger.debug('TaskGather:_onIbmConnect');
}
_onIbmConnectFailure(cs, _ep, evt) {
const {reason} = evt;
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.info({evt}, 'TaskGather:_onIbmConnectFailure');
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Failed connecting to IBM watson speech recognizer: ${reason}`,
vendor: 'ibm',
}).catch((err) => this.logger.info({err}, 'Error generating alert for IBM connection failure'));
this.notifyError(`Failed connecting to speech vendor IBM: ${reason}`);
this.notifyTaskDone();
}
_onIbmError(cs, _ep, evt) {
this.logger.info({evt}, 'TaskGather:_onIbmError'); }
_onVadDetected(cs, ep) {
if (this.bargein && this.minBargeinWordCount === 0) {
this.logger.debug('TaskGather:_onVadDetected');

View File

@@ -424,7 +424,7 @@
"properties": {
"vendor": {
"type": "string",
"enum": ["google", "aws", "polly", "microsoft", "default"]
"enum": ["google", "aws", "polly", "microsoft", "nuance", "ibm", "default"]
},
"language": "string",
"voice": "string",
@@ -445,7 +445,7 @@
"properties": {
"vendor": {
"type": "string",
"enum": ["google", "aws", "microsoft", "nuance", "deepgram", "default"]
"enum": ["google", "aws", "microsoft", "nuance", "deepgram", "ibm", "default"]
},
"language": "string",
"vad": "#vad",
@@ -511,12 +511,30 @@
"asrDtmfTerminationDigit": "string",
"asrTimeout": "number",
"nuanceOptions": "#nuanceOptions",
"deepgramOptions": "#deepgramOptions"
"deepgramOptions": "#deepgramOptions",
"ibmOptions": "#ibmOptions"
},
"required": [
"vendor"
]
},
"ibmOptions": {
"properties": {
"sttApiKey": "string",
"sttRegion": "string",
"ttsApiKey": "string",
"ttsRegion": "string",
"instanceId": "string",
"model": "string",
"languageCustomizationId": "string",
"acousticCustomizationId": "string",
"baseModelVersion": "string",
"watsonMetadata": "string",
"watsonLearningOptOut": "boolean"
},
"required": [
]
},
"deepgramOptions": {
"properties": {
"apiKey": "string",

View File

@@ -6,7 +6,8 @@ const {
AzureTranscriptionEvents,
AwsTranscriptionEvents,
NuanceTranscriptionEvents,
DeepgramTranscriptionEvents
DeepgramTranscriptionEvents,
IbmTranscriptionEvents
} = require('../utils/constants');
const normalizeJambones = require('../utils/normalize-jambones');
@@ -19,7 +20,8 @@ class TaskTranscribe extends Task {
const {
setChannelVarsForStt,
normalizeTranscription,
removeSpeechListeners
removeSpeechListeners,
setSpeechCredentialsAtRuntime
} = require('../utils/transcription-utils')(logger);
this.setChannelVarsForStt = setChannelVarsForStt;
this.normalizeTranscription = normalizeTranscription;
@@ -35,16 +37,7 @@ class TaskTranscribe extends Task {
this.separateRecognitionPerChannel = recognizer.separateRecognitionPerChannel;
/* let credentials be supplied in the recognizer object at runtime */
if (recognizer.vendor === 'nuance') {
const {clientId, secret} = recognizer.nuanceOptions;
if (clientId && secret) {
this.sttCredentials = {client_id: clientId, secret};
}
}
else if (recognizer.vendor === 'deepgram') {
const {apiKey} = recognizer.deepgramOptions;
if (apiKey) this.sttCredentials = {api_key: apiKey};
}
this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);
recognizer.hints = recognizer.hints || [];
recognizer.altLanguages = recognizer.altLanguages || [];
@@ -55,7 +48,7 @@ class TaskTranscribe extends Task {
async exec(cs, {ep, ep2}) {
super.exec(cs);
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
const {getNuanceAccessToken} = cs.srf.locals.dbHelpers;
const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
if (cs.hasGlobalSttHints) {
const {hints, hintsBoost} = cs.globalSttHints;
@@ -102,7 +95,13 @@ class TaskTranscribe extends Task {
`Transcribe:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
this.sttCredentials = {...this.sttCredentials, access_token};
}
else if (this.vendor == 'ibm' && this.sttCredentials.stt_api_key) {
/* get ibm access token */
const {stt_api_key, stt_region} = this.sttCredentials;
const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
this.sttCredentials = {...this.sttCredentials, access_token, stt_region};
}
await this._startTranscribing(cs, ep, 1);
if (this.separateRecognitionPerChannel && ep2) {
await this._startTranscribing(cs, ep2, 2);
@@ -189,6 +188,19 @@ class TaskTranscribe extends Task {
ep.addCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure,
this._onDeepGramConnectFailure.bind(this, cs, ep, channel));
break;
case 'ibm':
this.bugname = 'ibm_transcribe';
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
ep.addCustomEventListener(IbmTranscriptionEvents.Connect,
this._onIbmConnect.bind(this, cs, ep, channel));
ep.addCustomEventListener(IbmTranscriptionEvents.ConnectFailure,
this._onIbmConnectFailure.bind(this, cs, ep, channel));
ep.addCustomEventListener(IbmTranscriptionEvents.Error,
this._onIbmError.bind(this, cs, ep, channel));
break;
default:
throw new Error(`Invalid vendor ${this.vendor}`);
}
@@ -279,7 +291,7 @@ class TaskTranscribe extends Task {
this._timer = null;
}
}
_onNuanceError(_cs, _ep, evt) {
_onNuanceError(_cs, _ep, _channel, evt) {
const {code, error, details} = evt;
if (code === 404 && error === 'No speech') {
this.logger.debug({code, error, details}, 'TaskTranscribe:_onNuanceError');
@@ -294,7 +306,7 @@ class TaskTranscribe extends Task {
this.logger.debug('TaskTranscribe:_onDeepgramConnect');
}
_onDeepGramConnectFailure(cs, _ep, evt) {
_onDeepGramConnectFailure(cs, _ep, _channel, evt) {
const {reason} = evt;
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.info({evt}, 'TaskTranscribe:_onDeepgramConnectFailure');
@@ -307,6 +319,29 @@ class TaskTranscribe extends Task {
this.notifyError(`Failed connecting to speech vendor deepgram: ${reason}`);
this.notifyTaskDone();
}
_onIbmConnect(_cs, _ep) {
this.logger.debug('TaskTranscribe:_onIbmConnect');
}
_onIbmConnectFailure(cs, _ep, _channel, evt) {
const {reason} = evt;
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.info({evt}, 'TaskTranscribe:_onIbmConnectFailure');
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Failed connecting to IBM watson speech recognizer: ${reason}`,
vendor: 'ibm',
}).catch((err) => this.logger.info({err}, 'Error generating alert for IBM connection failure'));
this.notifyError(`Failed connecting to speech vendor IBM: ${reason}`);
this.notifyTaskDone();
}
_onIbmError(cs, _ep, _channel, evt) {
this.logger.info({evt}, 'TaskGather:_onIbmError');
}
}
module.exports = TaskTranscribe;