mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-19 04:17:44 +00:00
feat: rest:dial amd (#339)
Add support for sending 'amd' property in createCall REST API and also added support for using any of the speech vendors for STT --------- Co-authored-by: Dave Horton <daveh@beachdognet.com>
This commit is contained in:
@@ -21,6 +21,10 @@ class RestCallSession extends CallSession {
|
||||
});
|
||||
this.req = req;
|
||||
this.ep = ep;
|
||||
// keep restDialTask reference for closing AMD
|
||||
if (tasks.length) {
|
||||
this.restDialTask = tasks[0];
|
||||
}
|
||||
|
||||
this.on('callStatusChange', this._notifyCallStatusChange.bind(this));
|
||||
this._notifyCallStatusChange({
|
||||
@@ -44,6 +48,10 @@ class RestCallSession extends CallSession {
|
||||
* This is invoked when the called party hangs up, in order to calculate the call duration.
|
||||
*/
|
||||
_callerHungup() {
|
||||
if (this.restDialTask) {
|
||||
this.logger.info('RestCallSession: releasing AMD');
|
||||
this.restDialTask.turnOffAmd();
|
||||
}
|
||||
this.callInfo.callTerminationBy = 'caller';
|
||||
const duration = moment().diff(this.dlg.connectTime, 'seconds');
|
||||
this.emit('callStatusChange', {callStatus: CallStatus.Completed, duration});
|
||||
|
||||
@@ -31,10 +31,20 @@ class TaskRestDial extends Task {
|
||||
this.cs = cs;
|
||||
this.canCancel = true;
|
||||
|
||||
if (this.data.amd) {
|
||||
this.startAmd = cs.startAmd;
|
||||
this.stopAmd = cs.stopAmd;
|
||||
this.on('amd', this._onAmdEvent.bind(this, cs));
|
||||
}
|
||||
|
||||
this._setCallTimer();
|
||||
await this.awaitTaskDone();
|
||||
}
|
||||
|
||||
turnOffAmd() {
|
||||
if (this.callSession.ep && this.callSession.ep.amd) this.stopAmd(this.callSession.ep, this);
|
||||
}
|
||||
|
||||
kill(cs) {
|
||||
super.kill(cs);
|
||||
this._clearCallTimer();
|
||||
@@ -67,6 +77,13 @@ class TaskRestDial extends Task {
|
||||
}
|
||||
}
|
||||
};
|
||||
if (this.startAmd) {
|
||||
try {
|
||||
this.startAmd(this.callSession, this.callSession.ep, this, this.data.amd);
|
||||
} catch (err) {
|
||||
this.logger.info({err}, 'Rest:dial:Call established - Error calling startAmd');
|
||||
}
|
||||
}
|
||||
const tasks = await cs.requestor.request('session:new', this.call_hook, params, httpHeaders);
|
||||
if (tasks && Array.isArray(tasks)) {
|
||||
this.logger.debug({tasks: tasks}, `TaskRestDial: replacing application with ${tasks.length} tasks`);
|
||||
@@ -101,6 +118,15 @@ class TaskRestDial extends Task {
|
||||
this.timer = null;
|
||||
this.kill(this.cs);
|
||||
}
|
||||
|
||||
_onAmdEvent(cs, evt) {
|
||||
this.logger.info({evt}, 'Rest:dial:_onAmdEvent');
|
||||
const {actionHook} = this.data.amd;
|
||||
this.performHook(cs, actionHook, evt)
|
||||
.catch((err) => {
|
||||
this.logger.error({err}, 'Rest:dial:_onAmdEvent - error calling actionHook');
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = TaskRestDial;
|
||||
|
||||
@@ -1,9 +1,16 @@
|
||||
const Emitter = require('events');
|
||||
const {readFile} = require('fs');
|
||||
const {
|
||||
TaskName,
|
||||
GoogleTranscriptionEvents,
|
||||
AwsTranscriptionEvents,
|
||||
AzureTranscriptionEvents,
|
||||
NuanceTranscriptionEvents,
|
||||
NvidiaTranscriptionEvents,
|
||||
IbmTranscriptionEvents,
|
||||
SonioxTranscriptionEvents,
|
||||
DeepgramTranscriptionEvents,
|
||||
JambonzTranscriptionEvents,
|
||||
AmdEvents,
|
||||
AvmdEvents
|
||||
} = require('./constants');
|
||||
@@ -54,6 +61,11 @@ class Amd extends Emitter {
|
||||
this.thresholdWordCount = opts.thresholdWordCount || 9;
|
||||
const {normalizeTranscription} = require('./transcription-utils')(logger);
|
||||
this.normalizeTranscription = normalizeTranscription;
|
||||
const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
|
||||
this.getNuanceAccessToken = getNuanceAccessToken;
|
||||
this.getIbmAccessToken = getIbmAccessToken;
|
||||
const {setChannelVarsForStt} = require('./transcription-utils')(logger);
|
||||
this.setChannelVarsForStt = setChannelVarsForStt;
|
||||
|
||||
const {
|
||||
noSpeechTimeoutMs = 5000,
|
||||
@@ -229,51 +241,92 @@ module.exports = (logger) => {
|
||||
|
||||
const startAmd = async(cs, ep, task, opts) => {
|
||||
const amd = ep.amd = new Amd(logger, cs, opts);
|
||||
const {vendor, language, sttCredentials} = amd;
|
||||
const sttOpts = {};
|
||||
const {vendor, language} = amd;
|
||||
let sttCredentials = amd.sttCredentials;
|
||||
const hints = voicemailHints[language] || [];
|
||||
|
||||
if (vendor === 'nuance' && sttCredentials.client_id) {
|
||||
/* get nuance access token */
|
||||
const {getNuanceAccessToken} = amd;
|
||||
const {client_id, secret} = sttCredentials;
|
||||
const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
|
||||
logger.debug({client_id}, `Gather:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
|
||||
sttCredentials = {...sttCredentials, access_token};
|
||||
}
|
||||
else if (vendor == 'ibm' && sttCredentials.stt_api_key) {
|
||||
/* get ibm access token */
|
||||
const {getIbmAccessToken} = amd;
|
||||
const {stt_api_key, stt_region} = sttCredentials;
|
||||
const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
|
||||
logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
|
||||
sttCredentials = {...sttCredentials, access_token, stt_region};
|
||||
}
|
||||
|
||||
/* set stt options */
|
||||
logger.info(`starting amd for vendor ${vendor} and language ${language}`);
|
||||
if ('google' === vendor) {
|
||||
sttOpts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(sttCredentials.credentials);
|
||||
sttOpts.GOOGLE_SPEECH_USE_ENHANCED = true;
|
||||
sttOpts.GOOGLE_SPEECH_HINTS = hints.join(',');
|
||||
if (opts.recognizer?.altLanguages) {
|
||||
sttOpts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = opts.recognizer.altLanguages.join(',');
|
||||
}
|
||||
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, onTranscription.bind(null, cs, ep, task));
|
||||
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, onEndOfUtterance.bind(null, cs, ep, task));
|
||||
}
|
||||
else if (['aws', 'polly'].includes(vendor)) {
|
||||
Object.assign(sttOpts, {
|
||||
AWS_ACCESS_KEY_ID: sttCredentials.accessKeyId,
|
||||
AWS_SECRET_ACCESS_KEY: sttCredentials.secretAccessKey,
|
||||
AWS_REGION: sttCredentials.region
|
||||
});
|
||||
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, onTranscription.bind(null, cs, ep, task));
|
||||
}
|
||||
else if ('microsoft' === vendor) {
|
||||
Object.assign(sttOpts, {
|
||||
'AZURE_SUBSCRIPTION_KEY': sttCredentials.api_key,
|
||||
'AZURE_REGION': sttCredentials.region
|
||||
});
|
||||
sttOpts.AZURE_SPEECH_HINTS = hints.join(',');
|
||||
if (opts.recognizer?.altLanguages) {
|
||||
sttOpts.AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = opts.recognizer.altLanguages.join(',');
|
||||
}
|
||||
sttOpts.AZURE_INITIAL_SPEECH_TIMEOUT_MS = opts.resolveTimeoutMs || 20000;
|
||||
const sttOpts = amd.setChannelVarsForStt({name: TaskName.Gather}, sttCredentials, {
|
||||
vendor,
|
||||
hints,
|
||||
enhancedModel: true,
|
||||
altLanguages: opts.recognizer?.altLanguages || [],
|
||||
initialSpeechTimeoutMs: opts.resolveTimeoutMs,
|
||||
});
|
||||
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, onTranscription.bind(null, cs, ep, task));
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, onNoSpeechDetected.bind(null, cs, ep, task));
|
||||
}
|
||||
await ep.set(sttOpts).catch((err) => logger.info(err, 'Error setting channel variables'));
|
||||
|
||||
amd.transcriptionHandler = onTranscription.bind(null, cs, ep, task);
|
||||
amd.EndOfUtteranceHandler = onEndOfUtterance.bind(null, cs, ep, task);
|
||||
amd.noSpeechHandler = onNoSpeechDetected.bind(null, cs, ep, task);
|
||||
|
||||
switch (vendor) {
|
||||
case 'google':
|
||||
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, amd.transcriptionHandler);
|
||||
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, amd.EndOfUtteranceHandler);
|
||||
break;
|
||||
|
||||
case 'aws':
|
||||
case 'polly':
|
||||
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, amd.transcriptionHandler);
|
||||
break;
|
||||
case 'microsoft':
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, amd.transcriptionHandler);
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, amd.noSpeechHandler);
|
||||
break;
|
||||
case 'nuance':
|
||||
ep.addCustomEventListener(NuanceTranscriptionEvents.Transcription, amd.transcriptionHandler);
|
||||
break;
|
||||
|
||||
case 'deepgram':
|
||||
ep.addCustomEventListener(DeepgramTranscriptionEvents.Transcription, amd.transcriptionHandler);
|
||||
break;
|
||||
|
||||
case 'soniox':
|
||||
amd.bugname = 'soniox_amd_transcribe';
|
||||
ep.addCustomEventListener(SonioxTranscriptionEvents.Transcription, amd.transcriptionHandler);
|
||||
break;
|
||||
|
||||
case 'ibm':
|
||||
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription, amd.transcriptionHandler);
|
||||
break;
|
||||
|
||||
case 'nvidia':
|
||||
ep.addCustomEventListener(NvidiaTranscriptionEvents.Transcription, amd.transcriptionHandler);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (vendor.startsWith('custom:')) {
|
||||
ep.addCustomEventListener(JambonzTranscriptionEvents.Transcription, amd.transcriptionHandler);
|
||||
break;
|
||||
}
|
||||
else {
|
||||
throw new Error(`Invalid vendor ${this.vendor}`);
|
||||
}
|
||||
}
|
||||
amd
|
||||
.on(AmdEvents.NoSpeechDetected, (evt) => {
|
||||
task.emit('amd', {type: AmdEvents.NoSpeechDetected, ...evt});
|
||||
try {
|
||||
ep.connected && ep.stopTranscription({vendor, bugname});
|
||||
stopAmd(ep, task);
|
||||
} catch (err) {
|
||||
logger.info({err}, 'Error stopping transcription');
|
||||
}
|
||||
@@ -281,7 +334,7 @@ module.exports = (logger) => {
|
||||
.on(AmdEvents.HumanDetected, (evt) => {
|
||||
task.emit('amd', {type: AmdEvents.HumanDetected, ...evt});
|
||||
try {
|
||||
ep.connected && ep.stopTranscription({vendor, bugname});
|
||||
stopAmd(ep, task);
|
||||
} catch (err) {
|
||||
logger.info({err}, 'Error stopping transcription');
|
||||
}
|
||||
@@ -292,7 +345,7 @@ module.exports = (logger) => {
|
||||
.on(AmdEvents.DecisionTimeout, (evt) => {
|
||||
task.emit('amd', {type: AmdEvents.DecisionTimeout, ...evt});
|
||||
try {
|
||||
ep.connected && ep.stopTranscription({vendor, bugname});
|
||||
stopAmd(ep, task);
|
||||
} catch (err) {
|
||||
logger.info({err}, 'Error stopping transcription');
|
||||
}
|
||||
@@ -300,7 +353,7 @@ module.exports = (logger) => {
|
||||
.on(AmdEvents.ToneTimeout, (evt) => {
|
||||
//task.emit('amd', {type: AmdEvents.ToneTimeout, ...evt});
|
||||
try {
|
||||
ep.connected && ep.execute('avmd_stop').catch((err) => logger.info(err, 'Error stopping avmd'));
|
||||
stopAmd(ep, task);
|
||||
} catch (err) {
|
||||
logger.info({err}, 'Error stopping avmd');
|
||||
}
|
||||
@@ -308,7 +361,7 @@ module.exports = (logger) => {
|
||||
.on(AmdEvents.MachineStoppedSpeaking, () => {
|
||||
task.emit('amd', {type: AmdEvents.MachineStoppedSpeaking});
|
||||
try {
|
||||
ep.connected && ep.stopTranscription({vendor, bugname});
|
||||
stopAmd(ep, task);
|
||||
} catch (err) {
|
||||
logger.info({err}, 'Error stopping transcription');
|
||||
}
|
||||
@@ -327,6 +380,19 @@ module.exports = (logger) => {
|
||||
if (ep.amd) {
|
||||
vendor = ep.amd.vendor;
|
||||
ep.amd.stopAllTimers();
|
||||
|
||||
ep.removeListener(GoogleTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
|
||||
ep.removeListener(GoogleTranscriptionEvents.EndOfUtterance, ep.amd.EndOfUtteranceHandler);
|
||||
ep.removeListener(AwsTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
|
||||
ep.removeListener(AzureTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
|
||||
ep.removeListener(AzureTranscriptionEvents.NoSpeechDetected, ep.amd.noSpeechHandler);
|
||||
ep.removeListener(NuanceTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
|
||||
ep.removeListener(DeepgramTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
|
||||
ep.removeListener(SonioxTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
|
||||
ep.removeListener(IbmTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
|
||||
ep.removeListener(NvidiaTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
|
||||
ep.removeListener(JambonzTranscriptionEvents.Transcription, ep.amd.transcriptionHandler);
|
||||
|
||||
ep.amd = null;
|
||||
}
|
||||
|
||||
|
||||
@@ -106,3 +106,61 @@ test('test create-call call-hook basic authentication', async(t) => {
|
||||
t.error(err);
|
||||
}
|
||||
});
|
||||
|
||||
test('test create-call amd', async(t) => {
|
||||
clearModule.all();
|
||||
const {srf, disconnect} = require('../app');
|
||||
|
||||
try {
|
||||
await connect(srf);
|
||||
|
||||
|
||||
// GIVEN
|
||||
let from = 'create-call-amd';
|
||||
let account_sid = 'bb845d4b-83a9-4cde-a6e9-50f3743bab3f';
|
||||
|
||||
// Give UAS app time to come up
|
||||
const p = sippUac('uas.xml', '172.38.0.10', from);
|
||||
await waitFor(1000);
|
||||
|
||||
const post = bent('http://127.0.0.1:3000/', 'POST', 'json', 201);
|
||||
post('v1/createCall', {
|
||||
'account_sid':account_sid,
|
||||
"call_hook": {
|
||||
"url": "http://127.0.0.1:3100/",
|
||||
"method": "POST",
|
||||
"username": "username",
|
||||
"password": "password"
|
||||
},
|
||||
"from": from,
|
||||
"to": {
|
||||
"type": "phone",
|
||||
"number": "15583084809"
|
||||
},
|
||||
"amd": {
|
||||
"actionHook": "/actionHook"
|
||||
},
|
||||
"speech_recognizer_vendor": "google",
|
||||
"speech_recognizer_language": "en"
|
||||
});
|
||||
|
||||
let verbs = [
|
||||
{
|
||||
"verb": "pause",
|
||||
"length": 7
|
||||
}
|
||||
];
|
||||
provisionCallHook(from, verbs);
|
||||
//THEN
|
||||
await p;
|
||||
|
||||
let obj = await getJSON(`http:127.0.0.1:3100/lastRequest/${from}_actionHook`)
|
||||
t.ok(obj.body.type = 'amd_no_speech_detected',
|
||||
'create-call: AMD detected');
|
||||
disconnect();
|
||||
} catch (err) {
|
||||
console.log(`error received: ${err}`);
|
||||
disconnect();
|
||||
t.error(err);
|
||||
}
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user