feat: rest:dial amd (#339)

Add support for sending 'amd' property in createCall REST API and also added support for using any of the speech vendors for STT
---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>
This commit is contained in:
Hoan Luu Huu
2023-05-17 03:20:08 +07:00
committed by GitHub
parent 6933e82d46
commit 32a84b7b19
4 changed files with 196 additions and 38 deletions

View File

@@ -21,6 +21,10 @@ class RestCallSession extends CallSession {
});
this.req = req;
this.ep = ep;
// keep restDialTask reference for closing AMD
if (tasks.length) {
this.restDialTask = tasks[0];
}
this.on('callStatusChange', this._notifyCallStatusChange.bind(this));
this._notifyCallStatusChange({
@@ -44,6 +48,10 @@ class RestCallSession extends CallSession {
* This is invoked when the called party hangs up, in order to calculate the call duration.
*/
_callerHungup() {
if (this.restDialTask) {
this.logger.info('RestCallSession: releasing AMD');
this.restDialTask.turnOffAmd();
}
this.callInfo.callTerminationBy = 'caller';
const duration = moment().diff(this.dlg.connectTime, 'seconds');
this.emit('callStatusChange', {callStatus: CallStatus.Completed, duration});

View File

@@ -31,10 +31,20 @@ class TaskRestDial extends Task {
this.cs = cs;
this.canCancel = true;
if (this.data.amd) {
this.startAmd = cs.startAmd;
this.stopAmd = cs.stopAmd;
this.on('amd', this._onAmdEvent.bind(this, cs));
}
this._setCallTimer();
await this.awaitTaskDone();
}
turnOffAmd() {
if (this.callSession.ep && this.callSession.ep.amd) this.stopAmd(this.callSession.ep, this);
}
kill(cs) {
super.kill(cs);
this._clearCallTimer();
@@ -67,6 +77,13 @@ class TaskRestDial extends Task {
}
}
};
if (this.startAmd) {
try {
this.startAmd(this.callSession, this.callSession.ep, this, this.data.amd);
} catch (err) {
this.logger.info({err}, 'Rest:dial:Call established - Error calling startAmd');
}
}
const tasks = await cs.requestor.request('session:new', this.call_hook, params, httpHeaders);
if (tasks && Array.isArray(tasks)) {
this.logger.debug({tasks: tasks}, `TaskRestDial: replacing application with ${tasks.length} tasks`);
@@ -101,6 +118,15 @@ class TaskRestDial extends Task {
this.timer = null;
this.kill(this.cs);
}
_onAmdEvent(cs, evt) {
this.logger.info({evt}, 'Rest:dial:_onAmdEvent');
const {actionHook} = this.data.amd;
this.performHook(cs, actionHook, evt)
.catch((err) => {
this.logger.error({err}, 'Rest:dial:_onAmdEvent - error calling actionHook');
});
}
}
module.exports = TaskRestDial;

View File

@@ -1,9 +1,16 @@
const Emitter = require('events');
const {readFile} = require('fs');
const {
TaskName,
GoogleTranscriptionEvents,
AwsTranscriptionEvents,
AzureTranscriptionEvents,
NuanceTranscriptionEvents,
NvidiaTranscriptionEvents,
IbmTranscriptionEvents,
SonioxTranscriptionEvents,
DeepgramTranscriptionEvents,
JambonzTranscriptionEvents,
AmdEvents,
AvmdEvents
} = require('./constants');
@@ -54,6 +61,11 @@ class Amd extends Emitter {
this.thresholdWordCount = opts.thresholdWordCount || 9;
const {normalizeTranscription} = require('./transcription-utils')(logger);
this.normalizeTranscription = normalizeTranscription;
const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
this.getNuanceAccessToken = getNuanceAccessToken;
this.getIbmAccessToken = getIbmAccessToken;
const {setChannelVarsForStt} = require('./transcription-utils')(logger);
this.setChannelVarsForStt = setChannelVarsForStt;
const {
noSpeechTimeoutMs = 5000,
@@ -229,51 +241,92 @@ module.exports = (logger) => {
const startAmd = async(cs, ep, task, opts) => {
const amd = ep.amd = new Amd(logger, cs, opts);
const {vendor, language, sttCredentials} = amd;
const sttOpts = {};
const {vendor, language} = amd;
let sttCredentials = amd.sttCredentials;
const hints = voicemailHints[language] || [];
if (vendor === 'nuance' && sttCredentials.client_id) {
/* get nuance access token */
const {getNuanceAccessToken} = amd;
const {client_id, secret} = sttCredentials;
const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
logger.debug({client_id}, `Gather:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
sttCredentials = {...sttCredentials, access_token};
}
else if (vendor == 'ibm' && sttCredentials.stt_api_key) {
/* get ibm access token */
const {getIbmAccessToken} = amd;
const {stt_api_key, stt_region} = sttCredentials;
const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
sttCredentials = {...sttCredentials, access_token, stt_region};
}
/* set stt options */
logger.info(`starting amd for vendor ${vendor} and language ${language}`);
if ('google' === vendor) {
sttOpts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(sttCredentials.credentials);
sttOpts.GOOGLE_SPEECH_USE_ENHANCED = true;
sttOpts.GOOGLE_SPEECH_HINTS = hints.join(',');
if (opts.recognizer?.altLanguages) {
sttOpts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = opts.recognizer.altLanguages.join(',');
}
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, onTranscription.bind(null, cs, ep, task));
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, onEndOfUtterance.bind(null, cs, ep, task));
}
else if (['aws', 'polly'].includes(vendor)) {
Object.assign(sttOpts, {
AWS_ACCESS_KEY_ID: sttCredentials.accessKeyId,
AWS_SECRET_ACCESS_KEY: sttCredentials.secretAccessKey,
AWS_REGION: sttCredentials.region
});
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, onTranscription.bind(null, cs, ep, task));
}
else if ('microsoft' === vendor) {
Object.assign(sttOpts, {
'AZURE_SUBSCRIPTION_KEY': sttCredentials.api_key,
'AZURE_REGION': sttCredentials.region
});
sttOpts.AZURE_SPEECH_HINTS = hints.join(',');
if (opts.recognizer?.altLanguages) {
sttOpts.AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = opts.recognizer.altLanguages.join(',');
}
sttOpts.AZURE_INITIAL_SPEECH_TIMEOUT_MS = opts.resolveTimeoutMs || 20000;
const sttOpts = amd.setChannelVarsForStt({name: TaskName.Gather}, sttCredentials, {
vendor,
hints,
enhancedModel: true,
altLanguages: opts.recognizer?.altLanguages || [],
initialSpeechTimeoutMs: opts.resolveTimeoutMs,
});
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, onTranscription.bind(null, cs, ep, task));
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, onNoSpeechDetected.bind(null, cs, ep, task));
}
await ep.set(sttOpts).catch((err) => logger.info(err, 'Error setting channel variables'));
amd.transcriptionHandler = onTranscription.bind(null, cs, ep, task);
amd.EndOfUtteranceHandler = onEndOfUtterance.bind(null, cs, ep, task);
amd.noSpeechHandler = onNoSpeechDetected.bind(null, cs, ep, task);
switch (vendor) {
case 'google':
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, amd.transcriptionHandler);
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, amd.EndOfUtteranceHandler);
break;
case 'aws':
case 'polly':
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, amd.transcriptionHandler);
break;
case 'microsoft':
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, amd.transcriptionHandler);
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, amd.noSpeechHandler);
break;
case 'nuance':
ep.addCustomEventListener(NuanceTranscriptionEvents.Transcription, amd.transcriptionHandler);
break;
case 'deepgram':
ep.addCustomEventListener(DeepgramTranscriptionEvents.Transcription, amd.transcriptionHandler);
break;
case 'soniox':
amd.bugname = 'soniox_amd_transcribe';
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription, amd.transcriptionHandler);
break;
case 'ibm':
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription, amd.transcriptionHandler);
break;
case 'nvidia':
ep.addCustomEventListener(NvidiaTranscriptionEvents.Transcription, amd.transcriptionHandler);
break;
default:
if (vendor.startsWith('custom:')) {
ep.addCustomEventListener(JambonzTranscriptionEvents.Transcription, amd.transcriptionHandler);
break;
}
else {
throw new Error(`Invalid vendor ${this.vendor}`);
}
}
amd
.on(AmdEvents.NoSpeechDetected, (evt) => {
task.emit('amd', {type: AmdEvents.NoSpeechDetected, ...evt});
try {
ep.connected && ep.stopTranscription({vendor, bugname});
stopAmd(ep, task);
} catch (err) {
logger.info({err}, 'Error stopping transcription');
}
@@ -281,7 +334,7 @@ module.exports = (logger) => {
.on(AmdEvents.HumanDetected, (evt) => {
task.emit('amd', {type: AmdEvents.HumanDetected, ...evt});
try {
ep.connected && ep.stopTranscription({vendor, bugname});
stopAmd(ep, task);
} catch (err) {
logger.info({err}, 'Error stopping transcription');
}
@@ -292,7 +345,7 @@ module.exports = (logger) => {
.on(AmdEvents.DecisionTimeout, (evt) => {
task.emit('amd', {type: AmdEvents.DecisionTimeout, ...evt});
try {
ep.connected && ep.stopTranscription({vendor, bugname});
stopAmd(ep, task);
} catch (err) {
logger.info({err}, 'Error stopping transcription');
}
@@ -300,7 +353,7 @@ module.exports = (logger) => {
.on(AmdEvents.ToneTimeout, (evt) => {
//task.emit('amd', {type: AmdEvents.ToneTimeout, ...evt});
try {
ep.connected && ep.execute('avmd_stop').catch((err) => logger.info(err, 'Error stopping avmd'));
stopAmd(ep, task);
} catch (err) {
logger.info({err}, 'Error stopping avmd');
}
@@ -308,7 +361,7 @@ module.exports = (logger) => {
.on(AmdEvents.MachineStoppedSpeaking, () => {
task.emit('amd', {type: AmdEvents.MachineStoppedSpeaking});
try {
ep.connected && ep.stopTranscription({vendor, bugname});
stopAmd(ep, task);
} catch (err) {
logger.info({err}, 'Error stopping transcription');
}
@@ -327,6 +380,19 @@ module.exports = (logger) => {
if (ep.amd) {
vendor = ep.amd.vendor;
ep.amd.stopAllTimers();
ep.removeListener(GoogleTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.removeListener(GoogleTranscriptionEvents.EndOfUtterance, ep.amd.EndOfUtteranceHandler);
ep.removeListener(AwsTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.removeListener(AzureTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.removeListener(AzureTranscriptionEvents.NoSpeechDetected, ep.amd.noSpeechHandler);
ep.removeListener(NuanceTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.removeListener(DeepgramTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.removeListener(SonioxTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.removeListener(IbmTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.removeListener(NvidiaTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.removeListener(JambonzTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.amd = null;
}