feat: rest:dial amd (#339)

Add support for sending 'amd' property in createCall REST API and also added support for using any of the speech vendors for STT
---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>
This commit is contained in:
Hoan Luu Huu
2023-05-17 03:20:08 +07:00
committed by GitHub
parent 6933e82d46
commit 32a84b7b19
4 changed files with 196 additions and 38 deletions

View File

@@ -21,6 +21,10 @@ class RestCallSession extends CallSession {
});
this.req = req;
this.ep = ep;
// keep restDialTask reference for closing AMD
if (tasks.length) {
this.restDialTask = tasks[0];
}
this.on('callStatusChange', this._notifyCallStatusChange.bind(this));
this._notifyCallStatusChange({
@@ -44,6 +48,10 @@ class RestCallSession extends CallSession {
* This is invoked when the called party hangs up, in order to calculate the call duration.
*/
_callerHungup() {
if (this.restDialTask) {
this.logger.info('RestCallSession: releasing AMD');
this.restDialTask.turnOffAmd();
}
this.callInfo.callTerminationBy = 'caller';
const duration = moment().diff(this.dlg.connectTime, 'seconds');
this.emit('callStatusChange', {callStatus: CallStatus.Completed, duration});

View File

@@ -31,10 +31,20 @@ class TaskRestDial extends Task {
this.cs = cs;
this.canCancel = true;
if (this.data.amd) {
this.startAmd = cs.startAmd;
this.stopAmd = cs.stopAmd;
this.on('amd', this._onAmdEvent.bind(this, cs));
}
this._setCallTimer();
await this.awaitTaskDone();
}
turnOffAmd() {
if (this.callSession.ep && this.callSession.ep.amd) this.stopAmd(this.callSession.ep, this);
}
kill(cs) {
super.kill(cs);
this._clearCallTimer();
@@ -67,6 +77,13 @@ class TaskRestDial extends Task {
}
}
};
if (this.startAmd) {
try {
this.startAmd(this.callSession, this.callSession.ep, this, this.data.amd);
} catch (err) {
this.logger.info({err}, 'Rest:dial:Call established - Error calling startAmd');
}
}
const tasks = await cs.requestor.request('session:new', this.call_hook, params, httpHeaders);
if (tasks && Array.isArray(tasks)) {
this.logger.debug({tasks: tasks}, `TaskRestDial: replacing application with ${tasks.length} tasks`);
@@ -101,6 +118,15 @@ class TaskRestDial extends Task {
this.timer = null;
this.kill(this.cs);
}
_onAmdEvent(cs, evt) {
this.logger.info({evt}, 'Rest:dial:_onAmdEvent');
const {actionHook} = this.data.amd;
this.performHook(cs, actionHook, evt)
.catch((err) => {
this.logger.error({err}, 'Rest:dial:_onAmdEvent - error calling actionHook');
});
}
}
module.exports = TaskRestDial;

View File

@@ -1,9 +1,16 @@
const Emitter = require('events');
const {readFile} = require('fs');
const {
TaskName,
GoogleTranscriptionEvents,
AwsTranscriptionEvents,
AzureTranscriptionEvents,
NuanceTranscriptionEvents,
NvidiaTranscriptionEvents,
IbmTranscriptionEvents,
SonioxTranscriptionEvents,
DeepgramTranscriptionEvents,
JambonzTranscriptionEvents,
AmdEvents,
AvmdEvents
} = require('./constants');
@@ -54,6 +61,11 @@ class Amd extends Emitter {
this.thresholdWordCount = opts.thresholdWordCount || 9;
const {normalizeTranscription} = require('./transcription-utils')(logger);
this.normalizeTranscription = normalizeTranscription;
const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
this.getNuanceAccessToken = getNuanceAccessToken;
this.getIbmAccessToken = getIbmAccessToken;
const {setChannelVarsForStt} = require('./transcription-utils')(logger);
this.setChannelVarsForStt = setChannelVarsForStt;
const {
noSpeechTimeoutMs = 5000,
@@ -229,51 +241,92 @@ module.exports = (logger) => {
const startAmd = async(cs, ep, task, opts) => {
const amd = ep.amd = new Amd(logger, cs, opts);
const {vendor, language, sttCredentials} = amd;
const sttOpts = {};
const {vendor, language} = amd;
let sttCredentials = amd.sttCredentials;
const hints = voicemailHints[language] || [];
if (vendor === 'nuance' && sttCredentials.client_id) {
/* get nuance access token */
const {getNuanceAccessToken} = amd;
const {client_id, secret} = sttCredentials;
const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
logger.debug({client_id}, `Gather:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
sttCredentials = {...sttCredentials, access_token};
}
else if (vendor == 'ibm' && sttCredentials.stt_api_key) {
/* get ibm access token */
const {getIbmAccessToken} = amd;
const {stt_api_key, stt_region} = sttCredentials;
const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
sttCredentials = {...sttCredentials, access_token, stt_region};
}
/* set stt options */
logger.info(`starting amd for vendor ${vendor} and language ${language}`);
if ('google' === vendor) {
sttOpts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(sttCredentials.credentials);
sttOpts.GOOGLE_SPEECH_USE_ENHANCED = true;
sttOpts.GOOGLE_SPEECH_HINTS = hints.join(',');
if (opts.recognizer?.altLanguages) {
sttOpts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = opts.recognizer.altLanguages.join(',');
}
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, onTranscription.bind(null, cs, ep, task));
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, onEndOfUtterance.bind(null, cs, ep, task));
}
else if (['aws', 'polly'].includes(vendor)) {
Object.assign(sttOpts, {
AWS_ACCESS_KEY_ID: sttCredentials.accessKeyId,
AWS_SECRET_ACCESS_KEY: sttCredentials.secretAccessKey,
AWS_REGION: sttCredentials.region
});
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, onTranscription.bind(null, cs, ep, task));
}
else if ('microsoft' === vendor) {
Object.assign(sttOpts, {
'AZURE_SUBSCRIPTION_KEY': sttCredentials.api_key,
'AZURE_REGION': sttCredentials.region
});
sttOpts.AZURE_SPEECH_HINTS = hints.join(',');
if (opts.recognizer?.altLanguages) {
sttOpts.AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = opts.recognizer.altLanguages.join(',');
}
sttOpts.AZURE_INITIAL_SPEECH_TIMEOUT_MS = opts.resolveTimeoutMs || 20000;
const sttOpts = amd.setChannelVarsForStt({name: TaskName.Gather}, sttCredentials, {
vendor,
hints,
enhancedModel: true,
altLanguages: opts.recognizer?.altLanguages || [],
initialSpeechTimeoutMs: opts.resolveTimeoutMs,
});
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, onTranscription.bind(null, cs, ep, task));
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, onNoSpeechDetected.bind(null, cs, ep, task));
}
await ep.set(sttOpts).catch((err) => logger.info(err, 'Error setting channel variables'));
amd.transcriptionHandler = onTranscription.bind(null, cs, ep, task);
amd.EndOfUtteranceHandler = onEndOfUtterance.bind(null, cs, ep, task);
amd.noSpeechHandler = onNoSpeechDetected.bind(null, cs, ep, task);
switch (vendor) {
case 'google':
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, amd.transcriptionHandler);
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, amd.EndOfUtteranceHandler);
break;
case 'aws':
case 'polly':
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, amd.transcriptionHandler);
break;
case 'microsoft':
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, amd.transcriptionHandler);
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, amd.noSpeechHandler);
break;
case 'nuance':
ep.addCustomEventListener(NuanceTranscriptionEvents.Transcription, amd.transcriptionHandler);
break;
case 'deepgram':
ep.addCustomEventListener(DeepgramTranscriptionEvents.Transcription, amd.transcriptionHandler);
break;
case 'soniox':
amd.bugname = 'soniox_amd_transcribe';
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription, amd.transcriptionHandler);
break;
case 'ibm':
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription, amd.transcriptionHandler);
break;
case 'nvidia':
ep.addCustomEventListener(NvidiaTranscriptionEvents.Transcription, amd.transcriptionHandler);
break;
default:
if (vendor.startsWith('custom:')) {
ep.addCustomEventListener(JambonzTranscriptionEvents.Transcription, amd.transcriptionHandler);
break;
}
else {
throw new Error(`Invalid vendor ${this.vendor}`);
}
}
amd
.on(AmdEvents.NoSpeechDetected, (evt) => {
task.emit('amd', {type: AmdEvents.NoSpeechDetected, ...evt});
try {
ep.connected && ep.stopTranscription({vendor, bugname});
stopAmd(ep, task);
} catch (err) {
logger.info({err}, 'Error stopping transcription');
}
@@ -281,7 +334,7 @@ module.exports = (logger) => {
.on(AmdEvents.HumanDetected, (evt) => {
task.emit('amd', {type: AmdEvents.HumanDetected, ...evt});
try {
ep.connected && ep.stopTranscription({vendor, bugname});
stopAmd(ep, task);
} catch (err) {
logger.info({err}, 'Error stopping transcription');
}
@@ -292,7 +345,7 @@ module.exports = (logger) => {
.on(AmdEvents.DecisionTimeout, (evt) => {
task.emit('amd', {type: AmdEvents.DecisionTimeout, ...evt});
try {
ep.connected && ep.stopTranscription({vendor, bugname});
stopAmd(ep, task);
} catch (err) {
logger.info({err}, 'Error stopping transcription');
}
@@ -300,7 +353,7 @@ module.exports = (logger) => {
.on(AmdEvents.ToneTimeout, (evt) => {
//task.emit('amd', {type: AmdEvents.ToneTimeout, ...evt});
try {
ep.connected && ep.execute('avmd_stop').catch((err) => logger.info(err, 'Error stopping avmd'));
stopAmd(ep, task);
} catch (err) {
logger.info({err}, 'Error stopping avmd');
}
@@ -308,7 +361,7 @@ module.exports = (logger) => {
.on(AmdEvents.MachineStoppedSpeaking, () => {
task.emit('amd', {type: AmdEvents.MachineStoppedSpeaking});
try {
ep.connected && ep.stopTranscription({vendor, bugname});
stopAmd(ep, task);
} catch (err) {
logger.info({err}, 'Error stopping transcription');
}
@@ -327,6 +380,19 @@ module.exports = (logger) => {
if (ep.amd) {
vendor = ep.amd.vendor;
ep.amd.stopAllTimers();
ep.removeListener(GoogleTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.removeListener(GoogleTranscriptionEvents.EndOfUtterance, ep.amd.EndOfUtteranceHandler);
ep.removeListener(AwsTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.removeListener(AzureTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.removeListener(AzureTranscriptionEvents.NoSpeechDetected, ep.amd.noSpeechHandler);
ep.removeListener(NuanceTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.removeListener(DeepgramTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.removeListener(SonioxTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.removeListener(IbmTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.removeListener(NvidiaTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.removeListener(JambonzTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
ep.amd = null;
}

View File

@@ -106,3 +106,61 @@ test('test create-call call-hook basic authentication', async(t) => {
t.error(err);
}
});
test('test create-call amd', async(t) => {
clearModule.all();
const {srf, disconnect} = require('../app');
try {
await connect(srf);
// GIVEN
let from = 'create-call-amd';
let account_sid = 'bb845d4b-83a9-4cde-a6e9-50f3743bab3f';
// Give UAS app time to come up
const p = sippUac('uas.xml', '172.38.0.10', from);
await waitFor(1000);
const post = bent('http://127.0.0.1:3000/', 'POST', 'json', 201);
post('v1/createCall', {
'account_sid':account_sid,
"call_hook": {
"url": "http://127.0.0.1:3100/",
"method": "POST",
"username": "username",
"password": "password"
},
"from": from,
"to": {
"type": "phone",
"number": "15583084809"
},
"amd": {
"actionHook": "/actionHook"
},
"speech_recognizer_vendor": "google",
"speech_recognizer_language": "en"
});
let verbs = [
{
"verb": "pause",
"length": 7
}
];
provisionCallHook(from, verbs);
//THEN
await p;
let obj = await getJSON(`http:127.0.0.1:3100/lastRequest/${from}_actionHook`)
t.ok(obj.body.type = 'amd_no_speech_detected',
'create-call: AMD detected');
disconnect();
} catch (err) {
console.log(`error received: ${err}`);
disconnect();
t.error(err);
}
});